Load libraries
library(magrittr)
library(forecast)
library(ggplot2)
library(tseries)
Read-in Data
# Read one hourly data:
DFHour <- read.csv("videoHourly02.csv",header=TRUE,sep=",")
# Calculate Percent Lost
DFHour$packtsPcnt <- (DFHour$pcktsLost/DFHour$pcktsCount)*100
DFAnalysis <- DFHour[,c(4,2,3,5)]
head(DFAnalysis)
## dteTime pcktsCount pcktsLost packtsPcnt
## 1 2017-01-08 00:00:00 986.58 6.12 0.6203248
## 2 2017-01-08 01:00:00 984.47 7.09 0.7201845
## 3 2017-01-08 02:00:00 984.64 6.62 0.6723269
## 4 2017-01-08 03:00:00 997.94 6.20 0.6212798
## 5 2017-01-08 04:00:00 987.24 5.38 0.5449536
## 6 2017-01-08 05:00:00 976.99 5.71 0.5844482
Convert to Time Series
pckt_data_full <- DFAnalysis$packtsPcnt%>%ts(start=c(0,1),frequency = 24)
# Keep only first 21 days
pckt_data <- pckt_data_full%>%subset.ts(end=21*24)
# Create Test Set for one day in the future
pckt_data_test <- pckt_data_full%>%subset.ts(start=21*24+1, end = 22*24)
pckt_data%>%autoplot() + ylab("Percent Packet Lost (%)") + xlab("Time (Days/Hours)")
adf.test(pckt_data)
## Warning in adf.test(pckt_data): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: pckt_data
## Dickey-Fuller = -7.8037, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
?kpss.test
kpss.test(pckt_data, null="Trend")
## Warning in kpss.test(pckt_data, null = "Trend"): p-value greater than
## printed p-value
##
## KPSS Test for Trend Stationarity
##
## data: pckt_data
## KPSS Trend = 0.029491, Truncation lag parameter = 5, p-value = 0.1
Explore sesonality of the data
pckt_data%>%ggsubseriesplot() + xlab("Hour of the day") + ylab("Percent Packet Lost (%)")
pckt_data%>%ggPacf() +ggtitle("Autocorrelation for Percent Package Loss Series")
pckt_data%>%Box.test(type = "Ljung")
##
## Box-Ljung test
##
## data: .
## X-squared = 46.655, df = 1, p-value = 8.466e-12
?Box.test
ggsubseriesplot(pckt_data)
ggseasonplot(pckt_data%>%subset(end=10*24))
library(TSA)
## Loading required package: leaps
## Loading required package: locfit
## locfit 1.5-9.1 2013-03-22
## Loading required package: mgcv
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
##
## getResponse
## This is mgcv 1.8-15. For overview type 'help("mgcv-package")'.
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:forecast':
##
## fitted.Arima, plot.Arima
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
pdg <- periodogram(pckt_data)
pdg$spec%>%order
## [1] 154 76 73 184 93 120 182 5 92 115 181 125 157 253 200 130 197
## [18] 201 250 139 81 70 198 84 153 36 45 59 13 161 105 183 109 143
## [35] 244 104 138 169 171 159 237 62 148 117 82 9 137 4 205 74 188
## [52] 226 63 89 179 72 133 165 174 8 151 194 218 172 80 44 60 88
## [69] 202 224 68 248 208 212 167 106 251 94 46 203 99 170 163 97 209
## [86] 166 103 33 155 16 135 186 232 162 91 204 222 102 24 127 142 123
## [103] 229 134 61 195 131 246 221 118 219 199 25 77 95 238 26 243 101
## [120] 207 121 12 158 216 177 57 67 233 241 34 242 51 32 27 1 10
## [137] 50 113 71 65 191 254 156 152 247 140 83 17 69 178 187 96 245
## [154] 240 39 217 15 164 38 19 235 37 114 175 211 122 180 54 129 11
## [171] 116 228 234 252 236 173 35 110 215 220 146 87 75 90 239 136 160
## [188] 249 144 132 30 231 41 49 189 126 23 196 47 112 223 185 119 3
## [205] 100 107 78 124 230 206 66 55 29 2 40 56 52 256 210 108 53
## [222] 145 79 150 20 6 98 147 128 28 58 227 225 141 190 168 214 14
## [239] 176 111 48 193 18 192 255 86 42 31 43 213 22 149 64 7 85
## [256] 21
1/pdg$freq[21]
## [1] 24.38095
1/pdg$freq[85]
## [1] 6.023529
1/pdg$freq[7]
## [1] 73.14286
1/pdg$freq[64]
## [1] 8
fitnaive <- snaive(pckt_data)
fcnaive <- forecast(fitnaive,h=24)
fcnaive%>%autoplot() + ylab("Percent Packet Lost (%)") + autolayer(pckt_data_test, size=0.2)
accuracy(fcnaive,pckt_data_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.007383779 0.2914955 0.2311604 -6.039159 30.68733 1.000000
## Test set -0.145475086 0.3528613 0.2632881 -31.596214 43.99839 1.138985
## ACF1 Theil's U
## Training set 0.1871620 NA
## Test set 0.1325135 1.2661
checkresiduals(fitnaive)
##
## Ljung-Box test
##
## data: residuals
## Q* = 306.47, df = 48, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 48
fitexp <- ets(pckt_data)
summary(fitexp)
## ETS(M,N,M)
##
## Call:
## ets(y = pckt_data)
##
## Smoothing parameters:
## alpha = 0.0756
## gamma = 1e-04
##
## Initial states:
## l = 0.6628
## s=1.0464 1.0166 0.9161 1.4182 1.2196 1.1259
## 0.8928 1.2173 1.3941 1.1501 1.31 1.1657 0.7631 0.6679 0.673 0.8169 0.7968 0.9682 0.8509 0.8214 0.815 0.9997 1.0404 0.9139
##
## sigma: 0.241
##
## AIC AICc BIC
## 1502.065 1505.242 1616.075
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.005692413 0.1988161 0.1563488 -5.167803 20.679 0.6763651
## ACF1
## Training set 0.02061415
?ets
fcets <- fitexp%>%forecast(h=24)
fcets%>%autoplot() +ylab("Percent Packet Lost (%)") + autolayer(pckt_data_test, size=0.2)
accuracy(fcets,pckt_data_test)
## ME RMSE MAE MPE MAPE
## Training set 0.005692413 0.1988161 0.1563488 -5.167803 20.67900
## Test set -0.146498059 0.2319476 0.1920514 -27.910633 32.14154
## MASE ACF1 Theil's U
## Training set 0.6763651 0.02061415 NA
## Test set 0.8308148 -0.00436959 1.046091
checkresiduals(fitexp)
##
## Ljung-Box test
##
## data: residuals
## Q* = 93.432, df = 22, p-value = 8.871e-11
##
## Model df: 26. Total lags used: 48
?Box.test
BoxCox.lambda(pckt_data)
## [1] -0.3036466
autoplot(pckt_data)
pckt_data%>%BoxCox(-0.303)%>%autoplot
ARIMA Models
arimafit <- auto.arima(pckt_data)
summary(arimafit)
## Series: pckt_data
## ARIMA(1,0,1)(2,0,0)[24] with non-zero mean
##
## Coefficients:
## ar1 ma1 sar1 sar2 mean
## 0.7348 -0.5507 0.2789 0.2204 0.8018
## s.e. 0.0909 0.1120 0.0435 0.0464 0.0320
##
## sigma^2 estimated as 0.05205: log likelihood=29.27
## AIC=-46.54 AICc=-46.37 BIC=-21.2
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001963561 0.2270144 0.1780557 -7.828116 23.97321 0.7702693
## ACF1
## Training set -0.01080923
checkresiduals(arimafit)
##
## Ljung-Box test
##
## data: residuals
## Q* = 77.309, df = 43, p-value = 0.001028
##
## Model df: 5. Total lags used: 48
fcarima<- arimafit%>% forecast(h=24)
fcarima%>%autoplot() + ylab("Percent Packet Lost (%)") + autolayer(pckt_data_test, size=0.2)
accuracy(fcarima, pckt_data_test)
## ME RMSE MAE MPE MAPE
## Training set 0.001963561 0.2270144 0.1780557 -7.828116 23.97321
## Test set -0.115330883 0.2503180 0.1998090 -27.143646 34.65398
## MASE ACF1 Theil's U
## Training set 0.7702693 -0.01080923 NA
## Test set 0.8643741 0.15197010 1.051514
fitbats <- tbats(pckt_data)
fctbats <- forecast(fitbats, h=24)
fctbats%>%autoplot() + ylab("Percent Packet Lost (%)") + autolayer(pckt_data_test, size=0.2)
accuracy(f = fctbats, pckt_data_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02633079 0.2050179 0.1574748 -2.629472 20.20505 0.6812360
## Test set -0.11678586 0.1989356 0.1582125 -23.103819 27.08748 0.6844277
## ACF1 Theil's U
## Training set -0.02784862 NA
## Test set 0.03971624 0.8779549
checkresiduals(fitbats)
##
## Ljung-Box test
##
## data: residuals
## Q* = 109.82, df = 29, p-value = 2.474e-11
##
## Model df: 19. Total lags used: 48